## Gawande, Krishna, and Olarreaga 2009 IO

library(foreign) 
library(Amelia)

## Load original dataset
gko2009 <- read.dta("GKO2009 IO Rep Data.dta")
head(gko2009)
dim(gko2009)
summary(gko2009)

## Drop ID vars
gko2009$isic <- gko2009$cname <-gko2009$country <- gko2009$name <- NULL

## Drop character vars
gko2009$execme <- gko2009$toppme <-gko2009$mdms <-gko2009$thresh <- gko2009$name <- gko2009$descr <- gko2009$fgovme <- gko2009$fgovrlc <- gko2009$soppme <- gko2009$descr <- gko2009$execrlc <- gko2009$execrel <-gko2009$nonchief <-gko2009$fgovrurl <-gko2009$fgovrel <-gko2009$sgovme <-gko2009$sgovrlc <- gko2009$sgovrel <-gko2009$tgovme <-gko2009$tgovrlc <-gko2009$tgovrel <-gko2009$foppme <-gko2009$fopprlc <-gko2009$fopprel <-gko2009$mdmh <-gko2009$housesys <- gko2009$fgovurl <- gko2009$pr <- gko2009$pluralty <- NULL

## Drop derived dummy vars
gko2009$isic1 <- gko2009$isic2 <-gko2009$isic3 <-gko2009$isic4 <-gko2009$isic5 <-gko2009$isic6 <-gko2009$isic7 <-gko2009$isic8 <-gko2009$isic9 <-gko2009$isic10 <-gko2009$isic11 <-gko2009$isic12 <-gko2009$isic13 <-gko2009$isic14 <-gko2009$isic15 <-gko2009$isic16 <-gko2009$isic17 <-gko2009$isic18 <-gko2009$isic19 <-gko2009$isic20 <-gko2009$isic21 <-gko2009$isic22 <-gko2009$isic23 <-gko2009$isic24 <-gko2009$isic25 <-gko2009$isic26 <-gko2009$isic27 <-gko2009$isic28 <- gko2009$y11 <- gko2009$y12 <-gko2009$y13 <-gko2009$y14 <-gko2009$y15 <-gko2009$y16 <-gko2009$y17 <-gko2009$y18 <-gko2009$y19 <-gko2009$y110 <-gko2009$y111 <-gko2009$y112 <-gko2009$y113 <- NULL

## Drop constituent terms (not in analysis model)

## Drop vars with no variation
gko2009$select <- gko2009$ve <- gko2009$vbar <- gko2009$ebar <- NULL

## Drop calculated parameters
gko2009$betael <- gko2009$p_ct <-gko2009$p_ctr <-gko2009$p_ctu <-gko2009$pg1 <-gko2009$pg2 <- gko2009$sde <-gko2009$sebetael <-gko2009$seelt <-gko2009$sel <-gko2009$sesq <-gko2009$ssh <-gko2009$stconst <- gko2009$msesq <- NULL

## Drop imputed vars
gko2009$vaf <- NULL

which( colnames(gko2009)=="_merge" )
gko2009 <- gko2009[, -c(115)]

## How many variables? 176: reduction necessary
dim(gko2009)

## Which variables are in analysis and have missing data?
sum(is.na(gko2009 $left3))/nrow(gko2009)*100 ## Y
sum(is.na(gko2009 $ccode))/nrow(gko2009)*100 ## N
sum(is.na(gko2009 $va))/nrow(gko2009)*100 ## Y
analysis <- as.data.frame(cbind(gko2009$va, gko2009$left3))
missing <-as.data.frame(cbind(as.integer(complete.cases(gko2009$va)), as.integer(complete.cases(gko2009$left3))))

dim(analysis)
dim(missing)
apply(missing, 2, sd)

## Remove analysis variables
## gko2009 $left3 <- NULL
## head(gko2009)
## var(gko2009)

## Check correlations and missing values
round(cor(gko2009$prodindx, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$prodindx, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$prodindx))/nrow(gko2009)*100
## N
gko2009$prodindx <- NULL

round(cor(gko2009$vs, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$vs, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$vs))/nrow(gko2009)*100
## Y

round(cor(gko2009$firms, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$firms, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$firms))/nrow(gko2009)*100
## N
gko2009$firms <- NULL

round(cor(gko2009$ne, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$ne, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$ne))/nrow(gko2009)*100
## N
gko2009$ne <- NULL

round(cor(gko2009$nfe, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$nfe, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$nfe))/nrow(gko2009)*100
## N
gko2009$nfe <- NULL

round(cor(gko2009$payroll, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$payroll, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$payroll))/nrow(gko2009)*100
## Y

round(cor(gko2009$inv, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$inv, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$inv))/nrow(gko2009)*100
## N
gko2009$inv <- NULL

round(cor(gko2009$gdp, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$gdp, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$gdp))/nrow(gko2009)*100
## Y

round(cor(gko2009$mTrains, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$mTrains, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$mTrains))/nrow(gko2009)*100
## N
gko2009$mTrains <- NULL

round(cor(gko2009$x, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$x, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$x))/nrow(gko2009)*100
## Y

round(cor(gko2009$taru, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$taru, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$taru))/nrow(gko2009)*100
## N
gko2009$taru <- NULL

round(cor(gko2009$tarw, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tarw, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tarw))/nrow(gko2009)*100
## N
gko2009$tarw <- NULL

round(cor(gko2009$taruw, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$taruw, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$taruw))/nrow(gko2009)*100
## N
gko2009$taruw <- NULL

round(cor(gko2009$tarww, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tarww, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tarww))/nrow(gko2009)*100
## N
gko2009$tarww <- NULL

round(cor(gko2009$el, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$el, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$el))/nrow(gko2009)*100
## N
gko2009$el <- NULL

round(cor(gko2009$elt, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$elt, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$elt))/nrow(gko2009)*100
## N
gko2009$elt <- NULL

round(cor(gko2009$cap, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$cap, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$cap))/nrow(gko2009)*100
## N
gko2009$cap <- NULL

round(cor(gko2009$tlines, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tlines, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tlines))/nrow(gko2009)*100
## N
gko2009$tlines <- NULL

round(cor(gko2009$gini, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$gini, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$gini))/nrow(gko2009)*100
## N
gko2009$gini <- NULL

round(cor(gko2009$gdp95, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$gdp95, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$gdp95))/nrow(gko2009)*100
## N
gko2009$gdp95 <- NULL

round(cor(gko2009$ppp95, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$ppp95, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$ppp95))/nrow(gko2009)*100
## N
gko2009$ppp95 <- NULL

round(cor(gko2009$ppp, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$ppp, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$ppp))/nrow(gko2009)*100
## N
gko2009$ppp <- NULL

round(cor(gko2009$aidpc, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$aidpc, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$aidpc))/nrow(gko2009)*100
## N
gko2009$aidpc <- NULL

round(cor(gko2009$debtserv, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$debtserv, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$debtserv))/nrow(gko2009)*100
## N
gko2009$debtserv <- NULL

round(cor(gko2009$unemp, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$unemp, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$unemp))/nrow(gko2009)*100
## N
gko2009$unemp <- NULL

round(cor(gko2009$mil_g, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$mil_g, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$mil_g))/nrow(gko2009)*100
## N
gko2009$mil_g <- NULL

round(cor(gko2009$immig, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$immig, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$immig))/nrow(gko2009)*100
## N
gko2009$immig <- NULL

round(cor(gko2009$fdi_gdp, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$fdi_gdp, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$fdi_gdp))/nrow(gko2009)*100
## N
gko2009$fdi_gdp <- NULL

round(cor(gko2009$newspc, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$newspc, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$newspc))/nrow(gko2009)*100
## N
gko2009$newspc <- NULL

round(cor(gko2009$debt_gdp, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$debt_gdp, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$debt_gdp))/nrow(gko2009)*100
## N
gko2009$debt_gdp <- NULL

round(cor(gko2009$system, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$system, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$system))/nrow(gko2009)*100
## N
gko2009$system <- NULL

round(cor(gko2009$yrsoffc, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$yrsoffc, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$yrsoffc))/nrow(gko2009)*100
## N
gko2009$yrsoffc <- NULL

round(cor(gko2009$finittrm, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$finittrm, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$finittrm))/nrow(gko2009)*100
## N
gko2009$finittrm <- NULL

round(cor(gko2009$yrcurnt, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$yrcurnt, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$yrcurnt))/nrow(gko2009)*100
## N
gko2009$yrcurnt <- NULL

round(cor(gko2009$multpl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$multpl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$multpl))/nrow(gko2009)*100
## N
gko2009$multpl <- NULL

round(cor(gko2009$military, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$military, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$military))/nrow(gko2009)*100
## N
gko2009$military <- NULL

round(cor(gko2009$defmin, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$defmin, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$defmin))/nrow(gko2009)*100
## N
gko2009$defmin <- NULL

round(cor(gko2009$percent1, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$percent1, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$percent1))/nrow(gko2009)*100
## N
gko2009$percent1 <- NULL

round(cor(gko2009$percentl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$percentl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$percentl))/nrow(gko2009)*100
## N
gko2009$percentl <- NULL

round(cor(gko2009$prtyin, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$prtyin, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$prtyin))/nrow(gko2009)*100
## N
gko2009$prtyin <- NULL

round(cor(gko2009$execnat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$execnat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$execnat))/nrow(gko2009)*100
## N
gko2009$execnat <- NULL

round(cor(gko2009$execrurl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$execrurl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$execrurl))/nrow(gko2009)*100
## N
gko2009$execrurl <- NULL

round(cor(gko2009$execreg, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$execreg, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$execreg))/nrow(gko2009)*100
## N
gko2009$execreg <- NULL

round(cor(gko2009$execage, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$execage, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$execage))/nrow(gko2009)*100
## N
gko2009$execage <- NULL

round(cor(gko2009$herfgov, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$herfgov, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$herfgov))/nrow(gko2009)*100
## N
gko2009$herfgov <- NULL

round(cor(gko2009$numgov, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$numgov, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$numgov))/nrow(gko2009)*100
## N
gko2009$numgov <- NULL

round(cor(gko2009$fgovseat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$fgovseat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$fgovseat))/nrow(gko2009)*100
## N
gko2009$fgovseat <- NULL

round(cor(gko2009$fgovnat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$fgovnat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$fgovnat))/nrow(gko2009)*100
## N
gko2009$fgovnat <- NULL

round(cor(gko2009$fgovreg, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$fgovreg, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$fgovreg))/nrow(gko2009)*100
## N
gko2009$fgovreg <- NULL

round(cor(gko2009$prodindx, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$prodindx, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$prodindx))/nrow(gko2009)*100
## N
gko2009$prodindx <- NULL

round(cor(gko2009$fgovage, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$fgovage, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$fgovage))/nrow(gko2009)*100
## N
gko2009$fgovage <- NULL

round(cor(gko2009$sgovseat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$sgovseat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$sgovseat))/nrow(gko2009)*100
## N
gko2009$sgovseat <- NULL

round(cor(gko2009$sgovnat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$sgovnat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$sgovnat))/nrow(gko2009)*100
## N
gko2009$sgovnat <- NULL

round(cor(gko2009$sgovrurl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$sgovrurl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$sgovrurl))/nrow(gko2009)*100
## N
gko2009$sgovrurl <- NULL

round(cor(gko2009$sgovreg, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$sgovreg, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$sgovreg))/nrow(gko2009)*100
## N
gko2009$sgovreg <- NULL

round(cor(gko2009$sgovage, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$sgovage, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$sgovage))/nrow(gko2009)*100
## N
gko2009$sgovage <- NULL

round(cor(gko2009$tgovseat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tgovseat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tgovseat))/nrow(gko2009)*100
## N
gko2009$tgovseat <- NULL

round(cor(gko2009$tgovnat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tgovnat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tgovnat))/nrow(gko2009)*100
## N
gko2009$tgovnat <- NULL

round(cor(gko2009$tgovrurl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tgovrurl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tgovrurl))/nrow(gko2009)*100
## N
gko2009$tgovrurl <- NULL

round(cor(gko2009$tgovreg, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tgovreg, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tgovreg))/nrow(gko2009)*100
## N
gko2009$tgovreg <- NULL

round(cor(gko2009$tgovage, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tgovage, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tgovage))/nrow(gko2009)*100
## N
gko2009$tgovage <- NULL

round(cor(gko2009$govoth, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$govoth, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$govoth))/nrow(gko2009)*100
## N
gko2009$govoth <- NULL

round(cor(gko2009$govothst, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$govothst, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$govothst))/nrow(gko2009)*100
## N
gko2009$govothst <- NULL

round(cor(gko2009$herfopp, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$herfopp, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$herfopp))/nrow(gko2009)*100
## N
gko2009$herfopp <- NULL


round(cor(gko2009$numopp, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$numopp, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$numopp))/nrow(gko2009)*100
## N
gko2009$numopp <- NULL

round(cor(gko2009$foppseat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$foppseat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$foppseat))/nrow(gko2009)*100
## N
gko2009$foppseat <- NULL

round(cor(gko2009$foppnat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$foppnat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$foppnat))/nrow(gko2009)*100
## N
gko2009$foppnat <- NULL

round(cor(gko2009$fopprurl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$fopprurl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$fopprurl))/nrow(gko2009)*100
## N
gko2009$fopprurl <- NULL

round(cor(gko2009$foppreg, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$foppreg, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$foppreg))/nrow(gko2009)*100
## N
gko2009$foppreg <- NULL

round(cor(gko2009$foppage, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$foppage, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$foppage))/nrow(gko2009)*100
## N
gko2009$foppage <- NULL

round(cor(gko2009$soppseat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$soppseat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$soppseat))/nrow(gko2009)*100
## N
gko2009$soppseat <- NULL

round(cor(gko2009$toppseat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$toppseat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$toppseat))/nrow(gko2009)*100
## N
gko2009$toppseat <- NULL

round(cor(gko2009$oppoth, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$oppoth, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$oppoth))/nrow(gko2009)*100
## N
gko2009$oppoth <- NULL

round(cor(gko2009$oppothst, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$oppothst, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$oppothst))/nrow(gko2009)*100
## N
gko2009$oppothst <- NULL

round(cor(gko2009$ulprty, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$ulprty, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$ulprty))/nrow(gko2009)*100
## N
gko2009$ulprty <- NULL

round(cor(gko2009$numul, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$numul, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$numul))/nrow(gko2009)*100
## N
gko2009$numul <- NULL

round(cor(gko2009$herftot, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$herftot, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$herftot))/nrow(gko2009)*100
## N
gko2009$herftot <- NULL

round(cor(gko2009$oppmajh, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$oppmajh, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$oppmajh))/nrow(gko2009)*100
## N
gko2009$oppmajh <- NULL

round(cor(gko2009$oppmajs, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$oppmajs, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$oppmajs))/nrow(gko2009)*100
## N
gko2009$oppmajs <- NULL

round(cor(gko2009$dateexec, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$dateexec, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$dateexec))/nrow(gko2009)*100
## N
gko2009$dateexec <- NULL

round(cor(gko2009$dateleg, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$dateleg, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$dateleg))/nrow(gko2009)*100
## N
gko2009$dateleg <- NULL

round(cor(gko2009$partyage, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$partyage, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$partyage))/nrow(gko2009)*100
## N
gko2009$partyage <- NULL

round(cor(gko2009$exelec, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$exelec, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$exelec))/nrow(gko2009)*100
## N
gko2009$exelec <- NULL

round(cor(gko2009$execspec, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$execspec, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$execspec))/nrow(gko2009)*100
## N
gko2009$execspec <- NULL

round(cor(gko2009$govspec, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$govspec, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$govspec))/nrow(gko2009)*100
## N
gko2009$govspec <- NULL

round(cor(gko2009$coalspec, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$coalspec, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$coalspec))/nrow(gko2009)*100
## N
gko2009$coalspec <- NULL

round(cor(gko2009$liec, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$liec, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$liec))/nrow(gko2009)*100
## N
gko2009$liec <- NULL

round(cor(gko2009$sensys, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$sensys, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$sensys))/nrow(gko2009)*100
## N
gko2009$sensys <- NULL

round(cor(gko2009$dhondt, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$dhondt, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$dhondt))/nrow(gko2009)*100
## N
gko2009$dhondt <- NULL

round(cor(gko2009$cl, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$cl, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$cl))/nrow(gko2009)*100
## N
gko2009$cl <- NULL

round(cor(gko2009$fraud, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$fraud, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$fraud))/nrow(gko2009)*100
## N
gko2009$fraud <- NULL

round(cor(gko2009$tenlong, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tenlong, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tenlong))/nrow(gko2009)*100
## N
gko2009$tenlong <- NULL

round(cor(gko2009$tenshort, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tenshort, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tenshort))/nrow(gko2009)*100
## N
gko2009$tenshort <- NULL

round(cor(gko2009$tensys, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tensys, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tensys))/nrow(gko2009)*100
## N
gko2009$tensys <- NULL

round(cor(gko2009$polariz, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$polariz, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$polariz))/nrow(gko2009)*100
## N
gko2009$polariz <- NULL

round(cor(gko2009$stabs, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$stabs, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$stabs))/nrow(gko2009)*100
## N
gko2009$stabs <- NULL

round(cor(gko2009$stabns, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$stabns, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$stabns))/nrow(gko2009)*100
## N
gko2009$stabns <- NULL

round(cor(gko2009$auton, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$auton, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$auton))/nrow(gko2009)*100
## N
gko2009$auton <- NULL

round(cor(gko2009$muni, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$muni, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$muni))/nrow(gko2009)*100
## N
gko2009$muni <- NULL

round(cor(gko2009$author, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$author, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$author))/nrow(gko2009)*100
## N
gko2009$author <- NULL

round(cor(gko2009$w, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$w, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$w))/nrow(gko2009)*100
## N
gko2009$w <- NULL

round(cor(gko2009$mehat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$mehat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$mehat))/nrow(gko2009)*100
## N
gko2009$mehat <- NULL

round(cor(gko2009$tw, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$tw, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$tw))/nrow(gko2009)*100
## N
gko2009$tw <- NULL

round(cor(gko2009$vahat, analysis, use = "pairwise.complete.obs"), 2)
round(cor(gko2009$vahat, missing, use = "pairwise.complete.obs"), 2)
sum(is.na(gko2009$vahat))/nrow(gko2009)*100
## N
gko2009$vahat <- NULL

## Imputation
case <-c(1:nrow(gko2009))
gko2009 <- cbind(gko2009, case)
head(gko2009)
dim(gko2009)

## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(gko2009)
mean(NAs(gko2009)/nrow(gko2009))*100

## Bounds: restrict left3 to positive values
dim(gko2009)
head(gko2009)

bds <- matrix(c(76, 0, 1000), nrow = 1, ncol = 3)
bds

## Thus: 5 imputations

set.seed(02138)
gko2009.out <- amelia(gko2009, m = 5, cs = "ccode", ts = "year", polytime = 3, lags = c("left3"), empri = 0.01*nrow(gko2009), bounds = bds, max.resample = 1000)

write.amelia(obj=gko2009.out, file.stem = "GKO2009 IO Imp Data", format = "dta", separate = FALSE)